package cn.xo68.boot.webgather.job;

import cn.xo68.boot.job.JobProvider;
import cn.xo68.boot.job.entity.DistributedLock;
import cn.xo68.boot.job.entity.QuartzJob;
import cn.xo68.boot.job.jdbcstore.lock.DistributedLockManager;
import cn.xo68.boot.webgather.common.GatherStatusEnums;
import cn.xo68.boot.webgather.document.WebGatherContentDoc;
import cn.xo68.boot.webgather.document.WebGatherLinkDoc;
import cn.xo68.boot.webgather.entity.GatherContentPageConfig;
import cn.xo68.boot.webgather.resolve.ContentResolve;
import cn.xo68.boot.webgather.resolve.ResolveFactory;
import cn.xo68.boot.webgather.service.WebGatherContentService;
import cn.xo68.boot.webgather.service.WebGatherLinkService;
import cn.xo68.core.date.DateTime;
import cn.xo68.core.util.JsonUtil;
import org.jsoup.nodes.Element;
import org.quartz.JobExecutionContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.data.domain.Page;
import org.springframework.stereotype.Component;

/**
 * 页面内容采集提供者
 * @author wuxie
 * @date 2018-12-13
 * {"jobId":"c527b080766f5851398753bb3d02b9ce","batchGatherCount":3}
 */
@ConditionalOnMissingBean(name = "webContentPageGatherJobProvider")
@Component("webContentPageGatherJobProvider")
public class WebContentPageGatherJobProvider  implements JobProvider {

    private final static Logger logger = LoggerFactory.getLogger(WebContentPageGatherJobProvider.class);

    @Autowired(required = false)
    private ResolveFactory resolveFactory;
    @Autowired
    private JsonUtil jsonUtil;
    @Autowired(required = false)
    private WebGatherLinkService webGatherLinkService;
    @Autowired(required = false)
    private WebGatherContentService webGatherContentService;

    @Autowired
    private DistributedLockManager distributedLockManager;

    @Override
    public void execute(JobExecutionContext context, QuartzJob quartzJob) {
        GatherContentPageConfig gatherContentPageConfig =jsonUtil.parse(quartzJob.getParameters(), GatherContentPageConfig.class);
        logger.debug("gatherContentPageConfig:  {}", gatherContentPageConfig);
        if(gatherContentPageConfig ==null){
            logger.error("参数不正确或为空");
            return;
        }
        String lockKey="lock:webContentPageGather";

        DistributedLock lock= distributedLockManager.tryLock(lockKey, "", 30*1000, 1*1000);
        if(lock==null){
            logger.info("没有申请到锁，lock_key: {}", lockKey);
            return;
        }
        Page<WebGatherLinkDoc> docs=null;
        try{
            docs = webGatherLinkService.findTop(GatherStatusEnums.WAITGATHING, gatherContentPageConfig.getBatchGatherCount());
            docs.getContent().forEach(gatherDoc ->{
                gatherDoc.setGatherStatus(GatherStatusEnums.GATHING);
                gatherDoc.setContentStartGatherTime(DateTime.Now().getDate());
            });
            webGatherLinkService.batchUpdateStatus(docs.getContent());
            logger.debug("开始本次内容采集:{}，还剩余: {} 条,本次采集: {}",quartzJob.getJobGroup() +"-"+ quartzJob.getJobName(),docs.getTotalElements(), docs.getContent().size());

        }finally {
            distributedLockManager.unLock(lock.getLockKey());
        }

        if(docs!=null && docs.hasContent() && docs.getContent().size() > 0) {
            docs.getContent().forEach(linkDoc->{
                try{
                    gatherPage(quartzJob,linkDoc);

                    linkDoc.setGatherStatus(GatherStatusEnums.FINISH);
                    linkDoc.setContentEndGatherTime(DateTime.Now().getDate());
                    webGatherLinkService.updateStatus(linkDoc);
                }catch (Throwable ex){
                    linkDoc.setGatherStatus(GatherStatusEnums.FAIL);
                    linkDoc.setContentEndGatherTime(DateTime.Now().getDate());
                    webGatherLinkService.updateStatus(linkDoc);
                }

            });
        }


    }

    private void gatherPage(QuartzJob quartzJob, WebGatherLinkDoc linkDoc) throws Throwable {
        String urlStr= linkDoc.getLinkUrl();

        ContentResolve contentResolveDefault=resolveFactory.getContentResolve(linkDoc.getGatherListPageConfig().getResolveType(), urlStr);

        Element titleElement = contentResolveDefault.getElement(linkDoc.getGatherListPageConfig().getContentTitle());
        String title= titleElement.text();

        Element bodyElement = contentResolveDefault.getElement(linkDoc.getGatherListPageConfig().getContentBody());
        String body= bodyElement.text();

        WebGatherContentDoc contentDoc=new WebGatherContentDoc();

        contentDoc.setContentTitle(title);
        contentDoc.setContentBody(body);

        contentDoc.setLinkUrl(urlStr);
        contentDoc.setContentGatherTime(DateTime.Now().getDate());
        webGatherContentService.insert(contentDoc);




    }
}
